# load required libraries
library("lme4")
library("ggplot2")
library("here")
library("tidyverse")
library("emmeans")
library("brms")
library("DHARMa")
library("glmmTMB")
library("performance")
library("scales")
library("grateful")
A pseudonymized/codified dataset was used as a precaution due to ethical considerations (patient-level clinical data). The full dataset is available upon request (see manuscript data availability statement). For the in vitro assays specifically, age and demographic location variables have been removed, in addition to codifying the levels of the variables not of direct interest.
# load the data
df <- read_csv(here("./data/ivSCAs_codified.csv"), show_col_types = FALSE) %>%
rename(HBB.genotype = `HBB Genotype`) %>%
rename(Sample.type = `Sample type`) %>%
select(ID, Group, Sample.type, HBB.genotype, Bloodgroup, Reticulocytes, Gender, Symptomatic,
# Age, # omitted due to data sensitivity
# Village, Ethnicity, # omitted due to data sensitivity + missing values
`Raw counts_iRBCs_CHOLINE_Gen1_35to40hpi`,
`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`,
`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
`Raw counts_Retics_Reticulocytes`,
`Raw counts_Retics_total RBCs`,
SCR_CHOLINE_Gen1_35to40hpi, # keep SCR for plots
# no choline models
`Raw counts_iRBCs_NO CHOLINE_Gen1_25to30hpi`,
`Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_35to40hpi`,
`Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_35to40hpi`,
# different timepoints
`Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi`,
`Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`,
`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi`,
`Raw counts_iRBCs_NO CHOLINE_Gen1_25to30hpi`,
`Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_25to30hpi`,
`Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_25to30hpi`,
# late stage
`Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi`,
`Raw counts_Late stages_CHOLINE_Gen0_35to40hpi`,
`Raw counts_Late stages_CHOLINE_Gen1_25to30hpi`,
`Raw counts_Late stages_CHOLINE_Gen1_35to40hpi`,
# parasitemia
`Raw counts_RBC_CHOLINE_Gen0_0to5hpi`,
`Raw counts_iRBCs_CHOLINE_Gen0_0to5hpi`,
# reticulocytes
`Raw counts_Retics_Reticulocytes`,
`Raw counts_Retics_total RBCs`,
) %>%
mutate(HBB.genotype = case_when(
HBB.genotype == 0 ~ "HbAA",
HBB.genotype == 1 ~ "HbAC",
HBB.genotype == 2 ~ "HbAS",
HBB.genotype == 3 ~ "HbSS",
HBB.genotype == 4 ~ "HbSC",
)) %>%
filter(!Group == "AA CTRL") %>%
filter_all(any_vars(!is.na(.))) %>% # remove empty lines/samples
mutate(across(c(ID, Group, Sample.type, HBB.genotype, Bloodgroup, Gender,
Symptomatic), as.factor)) %>%
mutate(HBB.genotype = fct_relevel(HBB.genotype, "HbAA")) %>% # set HbAA as baseline reference
mutate(Group = fct_relevel(Group, "AA")) %>% # set HbAA as baseline reference
mutate()
str(df)
## tibble [28 × 28] (S3: tbl_df/tbl/data.frame)
## $ ID : Factor w/ 28 levels "2","3","4","6",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Group : Factor w/ 5 levels "AA","AC","AS",..: 3 3 2 1 2 2 3 4 4 5 ...
## $ Sample.type : Factor w/ 1 level "TEST": 1 1 1 1 1 1 1 1 1 1 ...
## $ HBB.genotype : Factor w/ 5 levels "HbAA","HbAC",..: 3 3 2 1 2 2 3 4 4 5 ...
## $ Bloodgroup : Factor w/ 5 levels "0","1","2","3",..: 1 3 3 5 5 1 5 5 1 2 ...
## $ Reticulocytes : num [1:28] 1.78 1.65 1.35 1 2.48 0.62 2.8 5.71 1.84 4.62 ...
## $ Gender : Factor w/ 2 levels "0","1": 2 2 2 2 2 1 2 1 2 1 ...
## $ Symptomatic : Factor w/ 2 levels "0","1": 2 1 1 1 2 1 1 1 1 1 ...
## $ Raw counts_iRBCs_CHOLINE_Gen1_35to40hpi : num [1:28] 5473 4956 3198 3413 3199 ...
## $ Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi : num [1:28] 4028 4047 2831 3031 2854 ...
## $ Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi : num [1:28] 1435 904 361 372 332 ...
## $ Raw counts_Retics_Reticulocytes : num [1:28] 19 13 13 11 17 5 21 42 16 43 ...
## $ Raw counts_Retics_total RBCs : num [1:28] 1065 788 965 1098 685 ...
## $ SCR_CHOLINE_Gen1_35to40hpi : num [1:28] 26.3 18.3 11.3 10.9 10.4 ...
## $ Raw counts_iRBCs_NO CHOLINE_Gen1_25to30hpi : num [1:28] 3998 3600 2328 1694 1669 ...
## $ Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_35to40hpi: num [1:28] 2184 2385 1601 2025 1671 ...
## $ Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_35to40hpi : num [1:28] 1362 1120 558 830 827 ...
## $ Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi : num [1:28] 5760 5160 3904 2499 2149 ...
## $ Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi : num [1:28] 4308 4266 3568 2263 1945 ...
## $ Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi : num [1:28] 1440 886 332 236 202 277 195 51 102 103 ...
## $ Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_25to30hpi: num [1:28] 2643 2474 1786 1293 1191 ...
## $ Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_25to30hpi : num [1:28] 1348 1123 533 398 476 ...
## $ Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi : num [1:28] 2229 1865 1725 797 649 ...
## $ Raw counts_Late stages_CHOLINE_Gen0_35to40hpi : num [1:28] 891 613 1065 425 339 ...
## $ Raw counts_Late stages_CHOLINE_Gen1_25to30hpi : num [1:28] 301 302 482 167 90 62 29 29 52 64 ...
## $ Raw counts_Late stages_CHOLINE_Gen1_35to40hpi : num [1:28] 795 786 908 1164 1020 ...
## $ Raw counts_RBC_CHOLINE_Gen0_0to5hpi : num [1:28] 950 993 888 93494 95898 ...
## $ Raw counts_iRBCs_CHOLINE_Gen0_0to5hpi : num [1:28] 8 12 20 144 125 402 140 186 705 232 ...
summary(df)
## ID Group Sample.type HBB.genotype Bloodgroup Reticulocytes
## 2 : 1 AA: 5 TEST:28 HbAA: 5 0:13 Min. :0.570
## 3 : 1 AC: 5 HbAC: 5 1: 4 1st Qu.:1.333
## 4 : 1 AS: 4 HbAS: 4 2: 3 Median :1.715
## 6 : 1 SC: 2 HbSC: 2 3: 3 Mean :2.731
## 7 : 1 SS:12 HbSS:12 4: 5 3rd Qu.:4.515
## 8 : 1 Max. :6.720
## (Other):22
## Gender Symptomatic Raw counts_iRBCs_CHOLINE_Gen1_35to40hpi
## 0:13 0:22 Min. : 952
## 1:15 1: 6 1st Qu.:2283
## Median :3198
## Mean :3081
## 3rd Qu.:3704
## Max. :5987
##
## Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi
## Min. : 854
## 1st Qu.:2000
## Median :2842
## Mean :2709
## 3rd Qu.:3208
## Max. :5330
##
## Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi Raw counts_Retics_Reticulocytes
## Min. : 60.0 Min. : 5.00
## 1st Qu.: 161.2 1st Qu.: 13.00
## Median : 263.0 Median : 20.00
## Mean : 361.9 Mean : 34.43
## 3rd Qu.: 505.2 3rd Qu.: 50.25
## Max. :1435.0 Max. :115.00
##
## Raw counts_Retics_total RBCs SCR_CHOLINE_Gen1_35to40hpi
## Min. : 528.0 Min. : 4.83
## 1st Qu.: 881.2 1st Qu.: 7.06
## Median :1250.5 Median :10.43
## Mean :1221.3 Mean :10.92
## 3rd Qu.:1473.0 3rd Qu.:14.06
## Max. :1893.0 Max. :26.27
##
## Raw counts_iRBCs_NO CHOLINE_Gen1_25to30hpi
## Min. : 121
## 1st Qu.:1154
## Median :1466
## Mean :1751
## 3rd Qu.:2291
## Max. :4081
##
## Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_35to40hpi
## Min. : 735
## 1st Qu.:1429
## Median :1730
## Mean :1886
## 3rd Qu.:2228
## Max. :4146
##
## Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_35to40hpi
## Min. : 75.0
## 1st Qu.: 259.0
## Median : 491.0
## Mean : 696.6
## 3rd Qu.: 971.8
## Max. :2283.0
##
## Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi
## Min. : 570
## 1st Qu.:1592
## Median :2048
## Mean :2481
## 3rd Qu.:3022
## Max. :5760
##
## Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi
## Min. : 516
## 1st Qu.:1366
## Median :1882
## Mean :2186
## 3rd Qu.:2800
## Max. :5184
##
## Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi
## Min. : 51.0
## 1st Qu.: 117.0
## Median : 198.5
## Mean : 289.2
## 3rd Qu.: 324.5
## Max. :1440.0
##
## Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_25to30hpi
## Min. : 90.0
## 1st Qu.: 925.8
## Median :1194.5
## Mean :1349.8
## 3rd Qu.:1731.5
## Max. :2851.0
##
## Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_25to30hpi
## Min. : 17.0
## 1st Qu.: 168.8
## Median : 240.5
## Mean : 396.0
## 3rd Qu.: 544.0
## Max. :1348.0
##
## Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi
## Min. : 521
## 1st Qu.: 674
## Median : 1725
## Mean :31611
## 3rd Qu.:90308
## Max. :97941
## NA's :7
## Raw counts_Late stages_CHOLINE_Gen0_35to40hpi
## Min. : 7.0
## 1st Qu.: 44.5
## Median : 126.0
## Mean : 227.7
## 3rd Qu.: 304.2
## Max. :1065.0
## NA's :6
## Raw counts_Late stages_CHOLINE_Gen1_25to30hpi
## Min. : 0.00
## 1st Qu.: 9.00
## Median : 30.50
## Mean : 72.39
## 3rd Qu.: 67.50
## Max. :482.00
##
## Raw counts_Late stages_CHOLINE_Gen1_35to40hpi
## Min. : 2.0
## 1st Qu.: 21.0
## Median : 246.5
## Mean : 389.4
## 3rd Qu.: 547.2
## Max. :2137.0
##
## Raw counts_RBC_CHOLINE_Gen0_0to5hpi Raw counts_iRBCs_CHOLINE_Gen0_0to5hpi
## Min. : 888 Min. : 8.0
## 1st Qu.:87834 1st Qu.: 158.0
## Median :91119 Median : 385.5
## Mean :80932 Mean : 531.6
## 3rd Qu.:93972 3rd Qu.: 718.2
## Max. :96324 Max. :3301.0
##
ggplot(df, aes(x = Group, y = SCR_CHOLINE_Gen1_35to40hpi)) +
geom_boxplot(aes(fill = Group), outlier.size = 2, outlier.colour = "grey",
width = 0.8, position = position_dodge(width = 0.8)) + # Reduce space between boxplots
geom_point(aes(fill = Group), alpha = 0.8, shape = 21, colour= "#115e67a6",
position = position_jitter(width = 0.1, height = 0.1)) + # Bubble plot
scale_fill_manual(values = c("AA CTRL" ="paleturquoise","AA" ="paleturquoise",
"AC" = "palegreen",
"AS" = "thistle",
"SC" = "lightpink1",
"SS"= "peachpuff")) + # Custom color for bubbles and boxplots
theme_minimal() + # Clean theme
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 14), # Increase font size for x-axis ticks
axis.text.y = element_text(size = 14), # Increase font size for y-axis ticks
axis.title.x = element_text(size = 16), # Increase font size for x-axis title
axis.title.y = element_text(size = 16), # Increase font size for y-axis title
strip.text = element_text(size = 12),
panel.grid.major = element_blank(), # Remove major gridlines
panel.grid.minor = element_blank(), # Remove minor gridlines
axis.line = element_line(color = "black") # Add black axis lines
) +
coord_cartesian(ylim = c(0, 50)) # Set y-axis limit from 0 to 50
The data was modelled using generalized linear model using the beta-binomial family and a single dispersion parameter to account for overdispersion.
\[SCR \sim HBB.genotype\]
This model was chosen to keep things interpretable, and take into account the fact that some variables are not covered by a lot of samples (e.g. sparse/uneven representation across levels of blood group). Additionally, since most estimates remain consistent in directionality and magnitude and LRTs tend to favour the simple model, we opt for this parsimonious model.
Although this approach cannot fully exclude confounding or interaction effects, the direction and magnitude of estimated effects were broadly consistent across alternative model specifications (with only the ivSCA test of SCR (+Choline, Gen1 35-40hpi) exhibiting unstable p-values on the border of our chosen significance level). However, because of this, sample size and power limitations should still be kept in mind when interpreting some of these GL(M)M results, with emphasis on effect sizes rather than exact p-values.
See the model comparison section for more details.
model <- glmmTMB(
cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~
## HBB.genotype
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 333.0 341.0 -160.5 321.0 22
##
##
## Dispersion parameter for betabinomial family (): 126
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.0539 0.1269 -16.179 < 2e-16 ***
## HBB.genotypeHbAC 0.2242 0.1720 1.304 0.19226
## HBB.genotypeHbAS 0.5495 0.1719 3.196 0.00139 **
## HBB.genotypeHbSC -0.3801 0.2667 -1.425 0.15408
## HBB.genotypeHbSS -0.4474 0.1600 -2.796 0.00518 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Model assumptions are OK:
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.243, p-value = 0.242
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "bonferroni",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.251 0.215 Inf 0.814 1.923 1 1.304 0.7690
## HbAS / HbAA 1.732 0.298 Inf 1.128 2.661 1 3.196 0.0056
## HbSC / HbAA 0.684 0.182 Inf 0.351 1.331 1 -1.425 0.6163
## HbSS / HbAA 0.639 0.102 Inf 0.429 0.953 1 -2.796 0.0207
##
## Confidence level used: 0.95
## Conf-level adjustment: bonferroni method for 4 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: bonferroni method for 4 tests
## Tests are performed on the log odds ratio scale
The above table not only shows the estimated odds ratio and its
corresponding p-value, but also the confidence interval of this odds
ratio (= asymp.LCL and asymp.UCL).
These p-values are adjusted for multiple testing using the Bonferroni method.
The estimates are already transformed from the log-scale, so they can be directly interpreted as odds ratios (OR).
The estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:
confint(emm, type="response", adjust="bonferroni")
As a more conservative estimate, we can use a Bayesian approach while fitting a beta-binomial generalized linear model. The estimated credible intervals for the SRC and odds ratios between genotypes should be less overly optimistic (more conservative).
df <- df %>%
mutate(sexual = `Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`) %>%
mutate(asexual = `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) %>%
mutate(total = `Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`)
brm_model <- brm(
bf(
sexual | trials(total) ~ HBB.genotype
),
family = beta_binomial(link = "logit"),
data = df,
iter = 5000,
chains = 8,
seed = 42,
cores = 8
)
## Compiling Stan program...
## Start sampling
summary(brm_model)
## Family: beta_binomial
## Links: mu = logit
## Formula: sexual | trials(total) ~ HBB.genotype
## Data: df (Number of observations: 28)
## Draws: 8 chains, each with iter = 5000; warmup = 2500; thin = 1;
## total post-warmup draws = 20000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -2.06 0.15 -2.37 -1.76 1.00 8856 10188
## HBB.genotypeHbAC 0.22 0.21 -0.19 0.64 1.00 10664 12753
## HBB.genotypeHbAS 0.55 0.21 0.14 0.96 1.00 10149 11274
## HBB.genotypeHbSC -0.42 0.34 -1.14 0.20 1.00 12927 10803
## HBB.genotypeHbSS -0.43 0.19 -0.80 -0.04 1.00 9899 11516
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## phi 94.96 28.92 47.30 160.14 1.00 14959 13486
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(brm_model)
The Bayesian estimated odds ratios for the different genotype contrasts are as follows:
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(brm_model, ~ HBB.genotype)
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "bonferroni",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio lower.HPD upper.HPD
## HbAC / HbAA 1.253 0.804 1.847
## HbAS / HbAA 1.728 1.086 2.521
## HbSC / HbAA 0.669 0.279 1.147
## HbSS / HbAA 0.648 0.430 0.923
##
## Point estimate displayed: median
## Results are back-transformed from the log odds ratio scale
## HPD interval probability: 0.95
The Bayesian estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:
confint(emm, type="response", adjust="bonferroni")
Overall effect of genotype is significant. However, the pairwise effects appear to be borderline; some p-values become non-significant when correcting for all pairwises comparisons.
kruskal.test(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` /
(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~
df$HBB.genotype)
##
## Kruskal-Wallis rank sum test
##
## data: df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`/(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) by df$HBB.genotype
## Kruskal-Wallis chi-squared = 16.512, df = 4, p-value = 0.002403
pairwise.wilcox.test(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` /
(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`),
df$HBB.genotype,
p.adjust.method = "holm")
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`/(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) and df$HBB.genotype
##
## HbAA HbAC HbAS HbSC
## HbAC 0.762 - - -
## HbAS 0.222 0.762 - -
## HbSC 0.762 0.571 0.667 -
## HbSS 0.215 0.039 0.040 0.791
##
## P value adjustment method: holm
pairwise.wilcox.test(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` /
(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`),
df$HBB.genotype,
p.adjust.method = "bonferroni")
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`/(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) and df$HBB.genotype
##
## HbAA HbAC HbAS HbSC
## HbAC 1.000 - - -
## HbAS 0.317 1.000 - -
## HbSC 1.000 0.952 1.000 -
## HbSS 0.268 0.039 0.044 1.000
##
## P value adjustment method: bonferroni
model <- glmmTMB(
cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi`,
`Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`) ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`) ~
## HBB.genotype
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 339.7 347.6 -163.8 327.7 22
##
##
## Dispersion parameter for betabinomial family (): 55.3
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.39455 0.21058 -11.371 < 2e-16 ***
## HBB.genotypeHbAC 0.31997 0.27980 1.144 0.25281
## HBB.genotypeHbAS 0.80608 0.27424 2.939 0.00329 **
## HBB.genotypeHbSC -0.09187 0.40511 -0.227 0.82060
## HBB.genotypeHbSS 0.20006 0.24421 0.819 0.41267
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Model assumptions shows slight deviation for residual vs predicted plots. Adding bloodgroup fixes this, but warns about a potential outlier (see below).
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.4538, p-value = 0.228
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "bonferroni",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.377 0.385 Inf 0.685 2.77 1 1.144 1.0000
## HbAS / HbAA 2.239 0.614 Inf 1.129 4.44 1 2.939 0.0132
## HbSC / HbAA 0.912 0.370 Inf 0.332 2.51 1 -0.227 1.0000
## HbSS / HbAA 1.221 0.298 Inf 0.664 2.25 1 0.819 1.0000
##
## Confidence level used: 0.95
## Conf-level adjustment: bonferroni method for 4 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: bonferroni method for 4 tests
## Tests are performed on the log odds ratio scale
The estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:
confint(emm, type="response", adjust="bonferroni")
model <- glmmTMB(
cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi`,
`Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`) ~ HBB.genotype + Bloodgroup,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`) ~
## HBB.genotype + Bloodgroup
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 340.3 353.6 -160.1 320.3 18
##
##
## Dispersion parameter for betabinomial family (): 73.2
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.2077 0.2429 -9.090 <2e-16 ***
## HBB.genotypeHbAC 0.1324 0.2895 0.457 0.6475
## HBB.genotypeHbAS 0.6401 0.2723 2.350 0.0188 *
## HBB.genotypeHbSC -0.2605 0.3805 -0.685 0.4935
## HBB.genotypeHbSS -0.1400 0.2701 -0.518 0.6043
## Bloodgroup1 0.5274 0.2269 2.324 0.0201 *
## Bloodgroup2 0.0228 0.2542 0.090 0.9285
## Bloodgroup3 -0.4202 0.3154 -1.332 0.1828
## Bloodgroup4 -0.1299 0.2264 -0.574 0.5662
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Model assumptions shows slight deviation for residual vs predicted plots. Adding bloodgroup fixes this, but warns about a single potential outlier (see below).
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.4224, p-value = 0.194
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "bonferroni",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.142 0.330 Inf 0.554 2.35 1 0.457 1.0000
## HbAS / HbAA 1.897 0.517 Inf 0.961 3.74 1 2.350 0.0750
## HbSC / HbAA 0.771 0.293 Inf 0.298 1.99 1 -0.685 1.0000
## HbSS / HbAA 0.869 0.235 Inf 0.443 1.71 1 -0.518 1.0000
##
## Results are averaged over the levels of: Bloodgroup
## Confidence level used: 0.95
## Conf-level adjustment: bonferroni method for 4 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: bonferroni method for 4 tests
## Tests are performed on the log odds ratio scale
The estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:
confint(emm, type="response", adjust="bonferroni")
kruskal.test(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi` /
(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`) ~
df$HBB.genotype)
##
## Kruskal-Wallis rank sum test
##
## data: df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi`/(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`) by df$HBB.genotype
## Kruskal-Wallis chi-squared = 7.8392, df = 4, p-value = 0.09765
pairwise.wilcox.test(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi` /
(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`),
df$HBB.genotype,
p.adjust.method = "holm")
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi`/(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`) and df$HBB.genotype
##
## HbAA HbAC HbAS HbSC
## HbAC 1.00 - - -
## HbAS 0.16 1.00 - -
## HbSC 1.00 1.00 1.00 -
## HbSS 1.00 1.00 0.38 1.00
##
## P value adjustment method: holm
pairwise.wilcox.test(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi` /
(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`),
df$HBB.genotype,
p.adjust.method = "bonferroni")
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi`/(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`) and df$HBB.genotype
##
## HbAA HbAC HbAS HbSC
## HbAC 1.00 - - -
## HbAS 0.16 1.00 - -
## HbSC 1.00 1.00 1.00 -
## HbSS 1.00 1.00 0.42 1.00
##
## P value adjustment method: bonferroni
model <- glmmTMB(
cbind(`Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_35to40hpi`,
`Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(`Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_35to40hpi`, `Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_35to40hpi`) ~
## HBB.genotype
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 360.9 368.9 -174.4 348.9 22
##
##
## Dispersion parameter for betabinomial family (): 58.6
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.74999 0.12533 -5.984 2.18e-09 ***
## HBB.genotypeHbAC 0.02208 0.17683 0.125 0.901
## HBB.genotypeHbAS 0.05586 0.18661 0.299 0.765
## HBB.genotypeHbSC -1.20940 0.30539 -3.960 7.49e-05 ***
## HBB.genotypeHbSS -0.89456 0.16171 -5.532 3.17e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Model assumptions are OK.
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.1843, p-value = 0.338
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "bonferroni",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.022 0.1810 Inf 0.657 1.590 1 0.125 1.0000
## HbAS / HbAA 1.057 0.1970 Inf 0.663 1.685 1 0.299 1.0000
## HbSC / HbAA 0.298 0.0911 Inf 0.139 0.640 1 -3.960 0.0003
## HbSS / HbAA 0.409 0.0661 Inf 0.273 0.612 1 -5.532 <.0001
##
## Confidence level used: 0.95
## Conf-level adjustment: bonferroni method for 4 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: bonferroni method for 4 tests
## Tests are performed on the log odds ratio scale
The estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:
confint(emm, type="response", adjust="bonferroni")
As a more conservative estimate, we can use a Bayesian approach while fitting a beta-binomial generalized linear model. The estimated credible intervals for the SRC and odds ratios between genotypes should be less overly optimistic (more conservative).
df <- df %>%
mutate(sexual = `Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_35to40hpi`) %>%
mutate(asexual = `Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_35to40hpi`) %>%
mutate(total = `Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_35to40hpi` + `Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_35to40hpi`)
brm_model <- brm(
bf(
sexual | trials(total) ~ HBB.genotype
),
family = beta_binomial(link = "logit"),
data = df,
iter = 5000,
chains = 8,
seed = 42,
cores = 8
)
## Compiling Stan program...
## Start sampling
summary(brm_model)
## Family: beta_binomial
## Links: mu = logit
## Formula: sexual | trials(total) ~ HBB.genotype
## Data: df (Number of observations: 28)
## Draws: 8 chains, each with iter = 5000; warmup = 2500; thin = 1;
## total post-warmup draws = 20000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.75 0.15 -1.05 -0.47 1.00 12477 12788
## HBB.genotypeHbAC 0.02 0.21 -0.39 0.42 1.00 13809 13609
## HBB.genotypeHbAS 0.06 0.22 -0.38 0.48 1.00 14248 13657
## HBB.genotypeHbSC -1.27 0.38 -2.09 -0.58 1.00 15828 11027
## HBB.genotypeHbSS -0.88 0.19 -1.25 -0.51 1.00 13160 13965
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## phi 46.89 14.10 23.71 78.34 1.00 16739 14422
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(brm_model)
The Bayesian estimated odds ratios for the different genotype contrasts are as follows:
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(brm_model, ~ HBB.genotype)
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "bonferroni",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio lower.HPD upper.HPD
## HbAC / HbAA 1.023 0.647 1.478
## HbAS / HbAA 1.059 0.652 1.564
## HbSC / HbAA 0.288 0.101 0.522
## HbSS / HbAA 0.413 0.269 0.576
##
## Point estimate displayed: median
## Results are back-transformed from the log odds ratio scale
## HPD interval probability: 0.95
The Bayesian estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:
confint(emm, type="response", adjust="bonferroni")
model <- glmmTMB(
cbind(`Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_25to30hpi`,
`Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_25to30hpi`) ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(`Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_25to30hpi`, `Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_25to30hpi`) ~
## HBB.genotype
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 327.3 335.3 -157.6 315.3 22
##
##
## Dispersion parameter for betabinomial family (): 69.8
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.10038 0.12987 -8.473 < 2e-16 ***
## HBB.genotypeHbAC 0.02967 0.18261 0.162 0.870922
## HBB.genotypeHbAS 0.24697 0.18488 1.336 0.181596
## HBB.genotypeHbSC -1.02698 0.30657 -3.350 0.000809 ***
## HBB.genotypeHbSS -0.64371 0.16314 -3.946 7.96e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Model assumptions are OK.
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.1618, p-value = 0.42
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "bonferroni",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.030 0.1880 Inf 0.653 1.63 1 0.162 1.0000
## HbAS / HbAA 1.280 0.2370 Inf 0.807 2.03 1 1.336 0.7264
## HbSC / HbAA 0.358 0.1100 Inf 0.167 0.77 1 -3.350 0.0032
## HbSS / HbAA 0.525 0.0857 Inf 0.350 0.79 1 -3.946 0.0003
##
## Confidence level used: 0.95
## Conf-level adjustment: bonferroni method for 4 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: bonferroni method for 4 tests
## Tests are performed on the log odds ratio scale
The estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:
confint(emm, type="response", adjust="bonferroni")
As a more conservative estimate, we can use a Bayesian approach while fitting a beta-binomial generalized linear model. The estimated credible intervals for the SRC and odds ratios between genotypes should be less overly optimistic (more conservative).
df <- df %>%
mutate(sexual = `Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_25to30hpi`) %>%
mutate(asexual = `Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_25to30hpi`) %>%
mutate(total = `Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_25to30hpi` + `Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_25to30hpi`)
brm_model <- brm(
bf(
sexual | trials(total) ~ HBB.genotype
),
family = beta_binomial(link = "logit"),
data = df,
iter = 5000,
chains = 8,
seed = 42,
cores = 8
)
## Compiling Stan program...
## Start sampling
summary(brm_model)
## Family: beta_binomial
## Links: mu = logit
## Formula: sexual | trials(total) ~ HBB.genotype
## Data: df (Number of observations: 28)
## Draws: 8 chains, each with iter = 5000; warmup = 2500; thin = 1;
## total post-warmup draws = 20000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -1.11 0.15 -1.43 -0.82 1.00 11892 12119
## HBB.genotypeHbAC 0.04 0.22 -0.37 0.48 1.00 13051 13455
## HBB.genotypeHbAS 0.26 0.22 -0.18 0.69 1.00 13628 13205
## HBB.genotypeHbSC -1.08 0.39 -1.90 -0.38 1.00 15222 11857
## HBB.genotypeHbSS -0.62 0.19 -0.99 -0.24 1.00 12353 13434
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## phi 54.25 17.02 26.72 92.62 1.00 16337 14642
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(brm_model)
The Bayesian estimated odds ratios for the different genotype contrasts are as follows:
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(brm_model, ~ HBB.genotype)
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "bonferroni",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio lower.HPD upper.HPD
## HbAC / HbAA 1.043 0.646 1.547
## HbAS / HbAA 1.293 0.806 1.930
## HbSC / HbAA 0.350 0.131 0.646
## HbSS / HbAA 0.535 0.351 0.758
##
## Point estimate displayed: median
## Results are back-transformed from the log odds ratio scale
## HPD interval probability: 0.95
The Bayesian estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:
confint(emm, type="response", adjust="bonferroni")
Note: there are a number of missing values for some of the raw iRBC counts at gen 0 and the raw late stages at gen 0.
model <- glmmTMB(
cbind(`Raw counts_Late stages_CHOLINE_Gen0_35to40hpi`,
`Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi` - `Raw counts_Late stages_CHOLINE_Gen0_35to40hpi`) ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(`Raw counts_Late stages_CHOLINE_Gen0_35to40hpi`, `Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi` -
## `Raw counts_Late stages_CHOLINE_Gen0_35to40hpi`) ~ HBB.genotype
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 287.7 294.0 -137.9 275.7 15
##
##
## Dispersion parameter for betabinomial family (): 5.06
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.95428 0.49939 -1.911 0.056016 .
## HBB.genotypeHbAC 0.41845 0.68680 0.609 0.542336
## HBB.genotypeHbAS -0.01763 0.65397 -0.027 0.978490
## HBB.genotypeHbSC -0.45791 1.02936 -0.445 0.656427
## HBB.genotypeHbSS -2.06863 0.62116 -3.330 0.000868 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Model assumptions are not OK…
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 0.0033828, p-value = 0.016
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value = 0.306
## alternative hypothesis: two.sided
# plot residuals per group
# plotResiduals(sim_res, df$HBB.genotype)
The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "bonferroni",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.520 1.0400 Inf 0.2734 8.448 1 0.609 1.0000
## HbAS / HbAA 0.983 0.6430 Inf 0.1918 5.032 1 -0.027 1.0000
## HbSC / HbAA 0.633 0.6510 Inf 0.0484 8.274 1 -0.445 1.0000
## HbSS / HbAA 0.126 0.0785 Inf 0.0268 0.596 1 -3.330 0.0035
##
## Confidence level used: 0.95
## Conf-level adjustment: bonferroni method for 4 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: bonferroni method for 4 tests
## Tests are performed on the log odds ratio scale
The estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:
confint(emm, type="response", adjust="bonferroni")
kruskal.test(df$`Raw counts_Late stages_CHOLINE_Gen0_35to40hpi` /
df$`Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi` ~
df$HBB.genotype)
##
## Kruskal-Wallis rank sum test
##
## data: df$`Raw counts_Late stages_CHOLINE_Gen0_35to40hpi`/df$`Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi` by df$HBB.genotype
## Kruskal-Wallis chi-squared = 9.4844, df = 4, p-value = 0.05007
pairwise.wilcox.test(df$`Raw counts_Late stages_CHOLINE_Gen0_35to40hpi` /
df$`Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi`,
df$HBB.genotype,
p.adjust.method = "holm")
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$`Raw counts_Late stages_CHOLINE_Gen0_35to40hpi`/df$`Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi` and df$HBB.genotype
##
## HbAA HbAC HbAS HbSC
## HbAC 1.00 - - -
## HbAS 1.00 1.00 - -
## HbSC 1.00 1.00 1.00 -
## HbSS 0.44 0.44 0.24 1.00
##
## P value adjustment method: holm
pairwise.wilcox.test(df$`Raw counts_Late stages_CHOLINE_Gen0_35to40hpi` /
df$`Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi`,
df$HBB.genotype,
p.adjust.method = "bonferroni")
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$`Raw counts_Late stages_CHOLINE_Gen0_35to40hpi`/df$`Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi` and df$HBB.genotype
##
## HbAA HbAC HbAS HbSC
## HbAC 1.00 - - -
## HbAS 1.00 1.00 - -
## HbSC 1.00 1.00 1.00 -
## HbSS 0.49 0.49 0.24 1.00
##
## P value adjustment method: bonferroni
model <- glmmTMB(
cbind(`Raw counts_Late stages_CHOLINE_Gen1_25to30hpi`,
`Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi` - `Raw counts_Late stages_CHOLINE_Gen1_25to30hpi`) ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(`Raw counts_Late stages_CHOLINE_Gen1_25to30hpi`, `Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi` -
## `Raw counts_Late stages_CHOLINE_Gen1_25to30hpi`) ~ HBB.genotype
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 284.7 292.7 -136.4 272.7 22
##
##
## Dispersion parameter for betabinomial family (): 42.7
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.2105 0.4598 -9.158 <2e-16 ***
## HBB.genotypeHbAC 1.0071 0.5414 1.860 0.0629 .
## HBB.genotypeHbAS 1.0262 0.5613 1.828 0.0675 .
## HBB.genotypeHbSC 1.2094 0.6411 1.886 0.0592 .
## HBB.genotypeHbSS 0.1628 0.5086 0.320 0.7489
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Model assumptions show slight deviations for quantiles.
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.5013, p-value = 0.294
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0.84459, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "bonferroni",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 2.74 1.480 Inf 0.708 10.58 1 1.860 0.2515
## HbAS / HbAA 2.79 1.570 Inf 0.687 11.34 1 1.828 0.2701
## HbSC / HbAA 3.35 2.150 Inf 0.676 16.62 1 1.886 0.2369
## HbSS / HbAA 1.18 0.599 Inf 0.330 4.19 1 0.320 1.0000
##
## Confidence level used: 0.95
## Conf-level adjustment: bonferroni method for 4 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: bonferroni method for 4 tests
## Tests are performed on the log odds ratio scale
The estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:
confint(emm, type="response", adjust="bonferroni")
kruskal.test(df$`Raw counts_Late stages_CHOLINE_Gen1_25to30hpi` /
df$`Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi` ~
df$HBB.genotype)
##
## Kruskal-Wallis rank sum test
##
## data: df$`Raw counts_Late stages_CHOLINE_Gen1_25to30hpi`/df$`Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi` by df$HBB.genotype
## Kruskal-Wallis chi-squared = 7.117, df = 4, p-value = 0.1298
pairwise.wilcox.test(df$`Raw counts_Late stages_CHOLINE_Gen1_25to30hpi` /
df$`Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi`,
df$HBB.genotype,
p.adjust.method = "holm")
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$`Raw counts_Late stages_CHOLINE_Gen1_25to30hpi`/df$`Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi` and df$HBB.genotype
##
## HbAA HbAC HbAS HbSC
## HbAC 1.00 - - -
## HbAS 1.00 1.00 - -
## HbSC 1.00 1.00 1.00 -
## HbSS 1.00 0.74 0.42 1.00
##
## P value adjustment method: holm
pairwise.wilcox.test(df$`Raw counts_Late stages_CHOLINE_Gen1_25to30hpi` /
df$`Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi`,
df$HBB.genotype,
p.adjust.method = "bonferroni")
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$`Raw counts_Late stages_CHOLINE_Gen1_25to30hpi`/df$`Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi` and df$HBB.genotype
##
## HbAA HbAC HbAS HbSC
## HbAC 1.00 - - -
## HbAS 1.00 1.00 - -
## HbSC 1.00 1.00 1.00 -
## HbSS 1.00 0.82 0.42 1.00
##
## P value adjustment method: bonferroni
model <- glmmTMB(
cbind(`Raw counts_Late stages_CHOLINE_Gen1_35to40hpi`,
`Raw counts_iRBCs_CHOLINE_Gen1_35to40hpi` - `Raw counts_Late stages_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(`Raw counts_Late stages_CHOLINE_Gen1_35to40hpi`, `Raw counts_iRBCs_CHOLINE_Gen1_35to40hpi` -
## `Raw counts_Late stages_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 367.0 375.0 -177.5 355.0 22
##
##
## Dispersion parameter for betabinomial family (): 8.7
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.4511 0.3466 -4.186 2.83e-05 ***
## HBB.genotypeHbAC -0.2143 0.4916 -0.436 0.662965
## HBB.genotypeHbAS -0.5160 0.5411 -0.954 0.340311
## HBB.genotypeHbSC -0.6327 0.6963 -0.909 0.363563
## HBB.genotypeHbSS -1.5736 0.4610 -3.413 0.000642 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Model assumptions are OK.
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.2606, p-value = 0.474
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value = 0.532
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "bonferroni",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 0.807 0.3970 Inf 0.2364 2.756 1 -0.436 1.0000
## HbAS / HbAA 0.597 0.3230 Inf 0.1545 2.306 1 -0.954 1.0000
## HbSC / HbAA 0.531 0.3700 Inf 0.0933 3.024 1 -0.909 1.0000
## HbSS / HbAA 0.207 0.0956 Inf 0.0655 0.656 1 -3.413 0.0026
##
## Confidence level used: 0.95
## Conf-level adjustment: bonferroni method for 4 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: bonferroni method for 4 tests
## Tests are performed on the log odds ratio scale
The estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:
confint(emm, type="response", adjust="bonferroni")
As a more conservative estimate, we can use a Bayesian approach while fitting a beta-binomial generalized linear model. The estimated credible intervals for the SRC and odds ratios between genotypes should be less overly optimistic (more conservative).
df <- df %>%
mutate(late = `Raw counts_Late stages_CHOLINE_Gen1_35to40hpi`) %>%
mutate(total = `Raw counts_iRBCs_CHOLINE_Gen1_35to40hpi`)
brm_model <- brm(
bf(
late | trials(total) ~ HBB.genotype
),
family = beta_binomial(link = "logit"),
data = df,
iter = 5000,
chains = 8,
seed = 42,
cores = 8
)
## Compiling Stan program...
## Start sampling
summary(brm_model)
## Family: beta_binomial
## Links: mu = logit
## Formula: late | trials(total) ~ HBB.genotype
## Data: df (Number of observations: 28)
## Draws: 8 chains, each with iter = 5000; warmup = 2500; thin = 1;
## total post-warmup draws = 20000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -1.46 0.39 -2.29 -0.72 1.00 12175 11604
## HBB.genotypeHbAC -0.21 0.55 -1.28 0.88 1.00 12093 12089
## HBB.genotypeHbAS -0.53 0.61 -1.77 0.64 1.00 12683 12317
## HBB.genotypeHbSC -0.77 0.84 -2.62 0.72 1.00 13861 11921
## HBB.genotypeHbSS -1.48 0.50 -2.42 -0.44 1.00 11510 12232
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## phi 7.74 2.40 3.76 13.09 1.00 12943 13439
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(brm_model)
The Bayesian estimated odds ratios for the different genotype contrasts are as follows:
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(brm_model, ~ HBB.genotype)
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "bonferroni",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio lower.HPD upper.HPD
## HbAC / HbAA 0.812 0.17327 2.029
## HbAS / HbAA 0.593 0.09346 1.606
## HbSC / HbAA 0.492 0.00885 1.643
## HbSS / HbAA 0.224 0.06318 0.546
##
## Point estimate displayed: median
## Results are back-transformed from the log odds ratio scale
## HPD interval probability: 0.95
The Bayesian estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:
confint(emm, type="response", adjust="bonferroni")
There does not appear to be a strong relationship between parasitemia and genotypes.
A quick test (not included) showed that a normal binomial model is a poor fit for the data, so we opt for the beta-binomial one immediately.
model <- glmmTMB(
cbind(`Raw counts_iRBCs_CHOLINE_Gen0_0to5hpi`,
`Raw counts_RBC_CHOLINE_Gen0_0to5hpi` - `Raw counts_iRBCs_CHOLINE_Gen0_0to5hpi`) ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(`Raw counts_iRBCs_CHOLINE_Gen0_0to5hpi`, `Raw counts_RBC_CHOLINE_Gen0_0to5hpi` -
## `Raw counts_iRBCs_CHOLINE_Gen0_0to5hpi`) ~ HBB.genotype
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 402.3 410.3 -195.2 390.3 22
##
##
## Dispersion parameter for betabinomial family (): 214
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.99277 0.32581 -15.324 <2e-16 ***
## HBB.genotypeHbAC 0.16185 0.43406 0.373 0.709
## HBB.genotypeHbAS 0.02599 0.47568 0.055 0.956
## HBB.genotypeHbSC 0.02415 0.58446 0.041 0.967
## HBB.genotypeHbSS 0.11327 0.36993 0.306 0.759
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Model assumptions are OK.
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.3572, p-value = 0.362
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "bonferroni",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.18 0.510 Inf 0.398 3.48 1 0.373 1.0000
## HbAS / HbAA 1.03 0.488 Inf 0.313 3.37 1 0.055 1.0000
## HbSC / HbAA 1.02 0.599 Inf 0.238 4.41 1 0.041 1.0000
## HbSS / HbAA 1.12 0.414 Inf 0.445 2.82 1 0.306 1.0000
##
## Confidence level used: 0.95
## Conf-level adjustment: bonferroni method for 4 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: bonferroni method for 4 tests
## Tests are performed on the log odds ratio scale
The estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:
confint(emm, type="response", adjust="bonferroni")
As a more conservative estimate, we can use a Bayesian approach while fitting a beta-binomial generalized linear model. The estimated credible intervals for the SRC and odds ratios between genotypes should be less overly optimistic (more conservative).
There does not appear to be a strong relationship between parasitemia and genotypes.
df <- df %>%
mutate(irbc = `Raw counts_iRBCs_CHOLINE_Gen0_0to5hpi`) %>%
mutate(total = `Raw counts_RBC_CHOLINE_Gen0_0to5hpi`)
brm_model <- brm(
bf(
irbc | trials(total) ~ HBB.genotype
),
family = beta_binomial(link = "logit"),
data = df,
iter = 5000,
chains = 8,
seed = 42,
cores = 8
)
## Compiling Stan program...
## Start sampling
summary(brm_model)
## Family: beta_binomial
## Links: mu = logit
## Formula: irbc | trials(total) ~ HBB.genotype
## Data: df (Number of observations: 28)
## Draws: 8 chains, each with iter = 5000; warmup = 2500; thin = 1;
## total post-warmup draws = 20000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -4.96 0.38 -5.78 -4.28 1.00 8330 8259
## HBB.genotypeHbAC 0.16 0.50 -0.83 1.17 1.00 9822 10027
## HBB.genotypeHbAS 0.01 0.55 -1.11 1.07 1.00 10337 10672
## HBB.genotypeHbSC -0.13 0.74 -1.76 1.15 1.00 10511 10074
## HBB.genotypeHbSS 0.16 0.42 -0.61 1.06 1.00 9190 9144
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## phi 161.89 49.45 79.95 271.70 1.00 13135 12156
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(brm_model)
The Bayesian estimated odds ratios for the different genotype contrasts are as follows:
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(brm_model, ~ HBB.genotype)
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "bonferroni",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio lower.HPD upper.HPD
## HbAC / HbAA 1.169 0.2936 2.72
## HbAS / HbAA 1.013 0.1924 2.49
## HbSC / HbAA 0.928 0.0422 2.60
## HbSS / HbAA 1.148 0.4176 2.49
##
## Point estimate displayed: median
## Results are back-transformed from the log odds ratio scale
## HPD interval probability: 0.95
These odds ratios have a large overlap with 1 and do not indicate a strong association between parasitemia and genotype.
The Bayesian estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:
confint(emm, type="response", adjust="bonferroni")
There does not appear to be a strong relationship between reticulocyte proportion and Hbb genotype, except for HbSS (significant wald z, although the Bayesian credible interval is close to 1 [1.113; 4.87]), and HbSC (not significant) do show higher proportions. More (balanced) data might be able to make stronger conclusions.
A quick test (not included) showed that a normal binomial model is a poor fit for the data, so we opt for the beta-binomial one immediately.
model <- glmmTMB(
cbind(`Raw counts_Retics_Reticulocytes`,
`Raw counts_Retics_total RBCs` - `Raw counts_Retics_Reticulocytes`) ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(`Raw counts_Retics_Reticulocytes`, `Raw counts_Retics_total RBCs` -
## `Raw counts_Retics_Reticulocytes`) ~ HBB.genotype
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 237.2 245.2 -112.6 225.2 22
##
##
## Dispersion parameter for betabinomial family (): 164
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.1328 0.2811 -14.704 <2e-16 ***
## HBB.genotypeHbAC -0.1458 0.4013 -0.363 0.7163
## HBB.genotypeHbAS 0.1174 0.4018 0.292 0.7702
## HBB.genotypeHbSC 0.8451 0.4195 2.014 0.0440 *
## HBB.genotypeHbSS 0.9382 0.3022 3.105 0.0019 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Some of these estimates appear significant, but when correcting for multiple testing this is no longer the case (except for the HbSS genotype). The magnitude of the effect size for SS and SC is about double, with fairly high standard errors.
Model assumptions are OK.
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.1131, p-value = 0.592
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "bonferroni",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 0.864 0.347 Inf 0.317 2.36 1 -0.363 1.0000
## HbAS / HbAA 1.125 0.452 Inf 0.412 3.07 1 0.292 1.0000
## HbSC / HbAA 2.328 0.977 Inf 0.816 6.64 1 2.014 0.1759
## HbSS / HbAA 2.555 0.772 Inf 1.201 5.44 1 3.105 0.0076
##
## Confidence level used: 0.95
## Conf-level adjustment: bonferroni method for 4 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: bonferroni method for 4 tests
## Tests are performed on the log odds ratio scale
The estimated parasitemia per group is, adjusted for multiple testing using the Bonferroni method:
confint(emm, type="response", adjust="bonferroni")
As a more conservative estimate, we can use a Bayesian approach while fitting a beta-binomial generalized linear model. The estimated credible intervals for the SRC and odds ratios between genotypes should be less overly optimistic (more conservative).
df <- df %>%
mutate(retics = `Raw counts_Retics_Reticulocytes`) %>%
mutate(retics_total = `Raw counts_Retics_total RBCs`)
brm_model <- brm(
bf(
retics | trials(retics_total) ~ HBB.genotype
),
family = beta_binomial(link = "logit"),
data = df,
iter = 5000,
chains = 8,
seed = 42,
cores = 8
)
## Compiling Stan program...
## Start sampling
summary(brm_model)
## Family: beta_binomial
## Links: mu = logit
## Formula: retics | trials(retics_total) ~ HBB.genotype
## Data: df (Number of observations: 28)
## Draws: 8 chains, each with iter = 5000; warmup = 2500; thin = 1;
## total post-warmup draws = 20000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -4.13 0.33 -4.84 -3.53 1.00 7097 7228
## HBB.genotypeHbAC -0.14 0.46 -1.06 0.78 1.00 9165 9642
## HBB.genotypeHbAS 0.10 0.47 -0.84 1.03 1.00 9086 9955
## HBB.genotypeHbSC 0.76 0.52 -0.34 1.72 1.00 8993 10679
## HBB.genotypeHbSS 0.95 0.35 0.30 1.70 1.00 7707 8279
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## phi 124.61 39.84 60.38 215.20 1.00 12685 12196
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(brm_model)
The Bayesian estimated odds ratios for the different genotype contrasts are as follows:
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(brm_model, ~ HBB.genotype)
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio lower.HPD upper.HPD
## HbAC / HbAA 0.866 0.265 1.94
## HbAS / HbAA 1.110 0.284 2.44
## HbSC / HbAA 2.187 0.441 4.92
## HbSS / HbAA 2.545 1.113 4.87
##
## Point estimate displayed: median
## Results are back-transformed from the log odds ratio scale
## HPD interval probability: 0.95
These odds ratios are very close to 1 (HbSS), or they include it in their credible interval. The association between reticulocyte proportion and genotype cannot reliably be shown based on this data, but it does appear that HbSS (and perhaps HbSC) have higher reticulocyte counts.
The Bayesian estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:
confint(emm, type="response")
No random effects are needed since we do not have technical replicates here, unlike the evSCA.
Model assumptions are violated for the regular binomial model: there is clear evidence of overdispersion.
model <- glm(
cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype,
family = binomial, data = df)
summary(model)
##
## Call:
## glm(formula = cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
## `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype,
## family = binomial, data = df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.05716 0.02455 -83.811 < 2e-16 ***
## HBB.genotypeHbAC 0.22328 0.03366 6.633 3.3e-11 ***
## HBB.genotypeHbAS 0.60589 0.03096 19.571 < 2e-16 ***
## HBB.genotypeHbSC -0.53386 0.06498 -8.216 < 2e-16 ***
## HBB.genotypeHbSS -0.46398 0.03270 -14.187 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2281.83 on 27 degrees of freedom
## Residual deviance: 685.49 on 23 degrees of freedom
## AIC: 900.81
##
## Number of Fisher Scoring iterations: 4
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.2926, p-value < 2.2e-16
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.250 0.04210 Inf 1.140 1.370 1 6.633 <.0001
## HbAS / HbAA 1.833 0.05670 Inf 1.684 1.994 1 19.571 <.0001
## HbAS / HbAC 1.466 0.04370 Inf 1.352 1.590 1 12.848 <.0001
## HbSC / HbAA 0.586 0.03810 Inf 0.491 0.700 1 -8.216 <.0001
## HbSC / HbAC 0.469 0.03020 Inf 0.393 0.559 1 -11.753 <.0001
## HbSC / HbAS 0.320 0.02020 Inf 0.269 0.380 1 -18.077 <.0001
## HbSS / HbAA 0.629 0.02060 Inf 0.575 0.687 1 -14.187 <.0001
## HbSS / HbAC 0.503 0.01590 Inf 0.461 0.548 1 -21.756 <.0001
## HbSS / HbAS 0.343 0.00984 Inf 0.317 0.371 1 -37.291 <.0001
## HbSS / HbSC 1.072 0.06860 Inf 0.901 1.277 1 1.093 0.8101
##
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 5 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: tukey method for comparing a family of 5 estimates
## Tests are performed on the log odds ratio scale
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "sidak",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.250 0.0421 Inf 1.150 1.360 1 6.633 <.0001
## HbAS / HbAA 1.833 0.0567 Inf 1.697 1.980 1 19.571 <.0001
## HbSC / HbAA 0.586 0.0381 Inf 0.499 0.689 1 -8.216 <.0001
## HbSS / HbAA 0.629 0.0206 Inf 0.580 0.682 1 -14.187 <.0001
##
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 4 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: sidak method for 4 tests
## Tests are performed on the log odds ratio scale
We can use a per-observation random effect to account for overdispersion. This seems to alleviate the problems.
df$obs <- seq_len(nrow(df))
model <- glmer(
cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype + (1 | obs),
family = binomial, data = df)
summary(model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~
## HBB.genotype + (1 | obs)
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 332.2 340.2 -160.1 320.2 22
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.51780 -0.14454 -0.01208 0.13350 0.81015
##
## Random effects:
## Groups Name Variance Std.Dev.
## obs (Intercept) 0.0852 0.2919
## Number of obs: 28, groups: obs, 28
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.0860 0.1337 -15.605 < 2e-16 ***
## HBB.genotypeHbAC 0.2315 0.1883 1.229 0.21903
## HBB.genotypeHbAS 0.5637 0.1989 2.834 0.00460 **
## HBB.genotypeHbSC -0.3972 0.2545 -1.561 0.11861
## HBB.genotypeHbSS -0.4620 0.1599 -2.890 0.00385 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) HBB.HAC HBB.HAS HBB.HSC
## HBB.gntyHAC -0.710
## HBB.gntyHAS -0.672 0.477
## HBB.gntyHSC -0.525 0.373 0.353
## HBB.gntyHSS -0.836 0.593 0.562 0.439
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.1713, p-value = 0.446
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.260 0.2370 Inf 0.754 2.107 1 1.229 0.7343
## HbAS / HbAA 1.757 0.3500 Inf 1.021 3.023 1 2.834 0.0371
## HbAS / HbAC 1.394 0.2760 Inf 0.812 2.394 1 1.676 0.4489
## HbSC / HbAA 0.672 0.1710 Inf 0.336 1.346 1 -1.561 0.5228
## HbSC / HbAC 0.533 0.1350 Inf 0.267 1.066 1 -2.475 0.0962
## HbSC / HbAS 0.383 0.1000 Inf 0.187 0.782 1 -3.669 0.0023
## HbSS / HbAA 0.630 0.1010 Inf 0.407 0.974 1 -2.890 0.0315
## HbSS / HbAC 0.500 0.0795 Inf 0.324 0.771 1 -4.361 0.0001
## HbSS / HbAS 0.359 0.0615 Inf 0.225 0.572 1 -5.983 <.0001
## HbSS / HbSC 0.937 0.2190 Inf 0.496 1.773 1 -0.278 0.9987
##
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 5 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: tukey method for comparing a family of 5 estimates
## Tests are performed on the log odds ratio scale
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "sidak",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.260 0.237 Inf 0.788 2.015 1 1.229 0.6280
## HbAS / HbAA 1.757 0.350 Inf 1.071 2.884 1 2.834 0.0183
## HbSC / HbAA 0.672 0.171 Inf 0.357 1.267 1 -1.561 0.3965
## HbSS / HbAA 0.630 0.101 Inf 0.423 0.938 1 -2.890 0.0153
##
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 4 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: sidak method for 4 tests
## Tests are performed on the log odds ratio scale
Another approach to take the overdispersion into account is to use a beta binomial GLM, this time with a single dispersion parameter (instead of one per group like in the ex vivo assay). For consistency with the ex vivo analysis, we will opt for beta-binomial model. Note that here there is no need for a per-genotype dispersion factor.
model <- glmmTMB(
cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~
## HBB.genotype
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 333.0 341.0 -160.5 321.0 22
##
##
## Dispersion parameter for betabinomial family (): 126
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.0539 0.1269 -16.179 < 2e-16 ***
## HBB.genotypeHbAC 0.2242 0.1720 1.304 0.19226
## HBB.genotypeHbAS 0.5495 0.1719 3.196 0.00139 **
## HBB.genotypeHbSC -0.3801 0.2667 -1.425 0.15408
## HBB.genotypeHbSS -0.4474 0.1600 -2.796 0.00518 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.243, p-value = 0.242
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.251 0.2150 Inf 0.783 2.000 1 1.304 0.6888
## HbAS / HbAA 1.732 0.2980 Inf 1.084 2.769 1 3.196 0.0121
## HbAS / HbAC 1.384 0.2280 Inf 0.884 2.167 1 1.979 0.2763
## HbSC / HbAA 0.684 0.1820 Inf 0.330 1.415 1 -1.425 0.6113
## HbSC / HbAC 0.546 0.1430 Inf 0.267 1.116 1 -2.307 0.1425
## HbSC / HbAS 0.395 0.1030 Inf 0.193 0.807 1 -3.549 0.0036
## HbSS / HbAA 0.639 0.1020 Inf 0.413 0.989 1 -2.796 0.0414
## HbSS / HbAC 0.511 0.0777 Inf 0.337 0.773 1 -4.419 0.0001
## HbSS / HbAS 0.369 0.0561 Inf 0.244 0.559 1 -6.559 <.0001
## HbSS / HbSC 0.935 0.2380 Inf 0.467 1.870 1 -0.265 0.9989
##
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 5 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: tukey method for comparing a family of 5 estimates
## Tests are performed on the log odds ratio scale
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "sidak",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.251 0.215 Inf 0.815 1.921 1 1.304 0.5743
## HbAS / HbAA 1.732 0.298 Inf 1.129 2.658 1 3.196 0.0056
## HbSC / HbAA 0.684 0.182 Inf 0.352 1.329 1 -1.425 0.4879
## HbSS / HbAA 0.639 0.102 Inf 0.429 0.952 1 -2.796 0.0206
##
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 4 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: sidak method for 4 tests
## Tests are performed on the log odds ratio scale
The univariate analyses already showed associations between HBB genotype and some of the clinical/demographic factors like blood group, reticulocytes, age and gender. However, accounting for these in the model does not improve overall fit as assessed by a likelihood ratio test of the complex vs simple model.
Moreover, due to the small sample size and the small and sparse groups, we fear we would not have an adequate amount of representative samples for each of the different levels to make any strong conclusions about the association with the sexual conversion rate.
The high collinearity between blood group and genotype might also pose a problem, but this is related to the unbalanced representation of blood groups across HBB genotypes (identifyability issues?).
The direction of estimates remains consistent regardless of which variables are added to the model, but magnitudes and p-value do fluctuate for the HbAS/HbAA and HbSS/HbAA contrasts. This is mainly a concern for the SCR (+Choline, Gen1 35-40hpi) comparison, where the estimated effects shrink for these two contrasts (P grows above 0.05) depending on which additional factors are added.
We therefore focus mainly on a descriptive modelling approach with simple parsimonous models with the single parameter of interest (Hbb genotype). That being said, this approach cannot fully exclude confounding or interaction effects, and sample size and power limitations should still be kept in mind when interpreting some of these GL(M)M results, with emphasis on effect sizes over exact p-values.
model_simple <- glmmTMB(
cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
The model assumptions are still OK for the more complex model.
Note that the difference between HbAS and HbAA is less pronounced in the full model compared to the genotype-only one, likely because of the strong correlation between blood group and genotype.
model <- glmmTMB(
cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype +
Bloodgroup +
log(Reticulocytes) + Gender + Symptomatic
# + scale(Age)
# + Village + Ethnicity # missing values for a few of the samples
,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~
## HBB.genotype + Bloodgroup + log(Reticulocytes) + Gender +
## Symptomatic
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 342.9 360.2 -158.5 316.9 15
##
##
## Dispersion parameter for betabinomial family (): 148
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.93846 0.19409 -9.987 <2e-16 ***
## HBB.genotypeHbAC 0.11535 0.20912 0.552 0.5812
## HBB.genotypeHbAS 0.40620 0.19476 2.086 0.0370 *
## HBB.genotypeHbSC -0.53981 0.29569 -1.826 0.0679 .
## HBB.genotypeHbSS -0.70566 0.28850 -2.446 0.0144 *
## Bloodgroup1 0.24984 0.19087 1.309 0.1906
## Bloodgroup2 -0.03578 0.21671 -0.165 0.8689
## Bloodgroup3 -0.30168 0.22902 -1.317 0.1878
## Bloodgroup4 -0.17577 0.20338 -0.864 0.3875
## log(Reticulocytes) 0.05181 0.13119 0.395 0.6929
## Gender1 0.06908 0.16017 0.431 0.6663
## Symptomatic1 -0.01213 0.14242 -0.085 0.9321
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.2419, p-value = 0.232
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
# pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "bonferroni",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.122 0.235 Inf 0.666 1.89 1 0.552 1.0000
## HbAS / HbAA 1.501 0.292 Inf 0.923 2.44 1 2.086 0.1480
## HbSC / HbAA 0.583 0.172 Inf 0.278 1.22 1 -1.826 0.2716
## HbSS / HbAA 0.494 0.142 Inf 0.240 1.02 1 -2.446 0.0578
##
## Results are averaged over the levels of: Bloodgroup, Gender, Symptomatic
## Confidence level used: 0.95
## Conf-level adjustment: bonferroni method for 4 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: bonferroni method for 4 tests
## Tests are performed on the log odds ratio scale
model <- glmmTMB(
cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype +
Bloodgroup +
log(Reticulocytes) + Gender + Symptomatic
# + scale(Age)
# + Village + Ethnicity # missing values for a few of the samples
,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~
## HBB.genotype + Bloodgroup + log(Reticulocytes) + Gender +
## Symptomatic
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 342.9 360.2 -158.5 316.9 15
##
##
## Dispersion parameter for betabinomial family (): 148
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.93846 0.19409 -9.987 <2e-16 ***
## HBB.genotypeHbAC 0.11535 0.20912 0.552 0.5812
## HBB.genotypeHbAS 0.40620 0.19476 2.086 0.0370 *
## HBB.genotypeHbSC -0.53981 0.29569 -1.826 0.0679 .
## HBB.genotypeHbSS -0.70566 0.28850 -2.446 0.0144 *
## Bloodgroup1 0.24984 0.19087 1.309 0.1906
## Bloodgroup2 -0.03578 0.21671 -0.165 0.8689
## Bloodgroup3 -0.30168 0.22902 -1.317 0.1878
## Bloodgroup4 -0.17577 0.20338 -0.864 0.3875
## log(Reticulocytes) 0.05181 0.13119 0.395 0.6929
## Gender1 0.06908 0.16017 0.431 0.6663
## Symptomatic1 -0.01213 0.14242 -0.085 0.9321
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.2419, p-value = 0.232
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.122 0.2350 Inf 0.634 1.985 1 0.552 0.9818
## HbAS / HbAA 1.501 0.2920 Inf 0.882 2.553 1 2.086 0.2262
## HbAS / HbAC 1.338 0.2330 Inf 0.832 2.150 1 1.671 0.4518
## HbSC / HbAA 0.583 0.1720 Inf 0.260 1.306 1 -1.826 0.3586
## HbSC / HbAC 0.519 0.1600 Inf 0.224 1.206 1 -2.122 0.2105
## HbSC / HbAS 0.388 0.1090 Inf 0.181 0.832 1 -3.385 0.0064
## HbSS / HbAA 0.494 0.1420 Inf 0.225 1.085 1 -2.446 0.1033
## HbSS / HbAC 0.440 0.1270 Inf 0.200 0.967 1 -2.846 0.0359
## HbSS / HbAS 0.329 0.0864 Inf 0.161 0.673 1 -4.235 0.0002
## HbSS / HbSC 0.847 0.2320 Inf 0.401 1.789 1 -0.605 0.9743
##
## Results are averaged over the levels of: Bloodgroup, Gender, Symptomatic
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 5 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: tukey method for comparing a family of 5 estimates
## Tests are performed on the log odds ratio scale
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "sidak",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.122 0.235 Inf 0.667 1.89 1 0.552 0.9692
## HbAS / HbAA 1.501 0.292 Inf 0.924 2.44 1 2.086 0.1400
## HbSC / HbAA 0.583 0.172 Inf 0.279 1.22 1 -1.826 0.2452
## HbSS / HbAA 0.494 0.142 Inf 0.241 1.01 1 -2.446 0.0565
##
## Results are averaged over the levels of: Bloodgroup, Gender, Symptomatic
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 4 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: sidak method for 4 tests
## Tests are performed on the log odds ratio scale
There are strong correlations between genotype and blood group (already observed in univariate risk analysis). The other factors appear to be less correlated.
performance::check_collinearity(model)
anova(model, model_simple, test = "LRT")
The LRT does not show a significantly better model fit for the more complex model.
There is a strong association between Hbb genotype and reticulocytes, but reticulocyte count itself does not seem to be associated with SCR.
ggplot(df,
aes(x = HBB.genotype, y = log(Reticulocytes), color = HBB.genotype)) +
geom_boxplot() +
geom_jitter(width = 0.1)
ggplot(df, aes(x = log(Reticulocytes),
y = `Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` /
`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`,
color = HBB.genotype,
fill = HBB.genotype
)) +
geom_jitter(height = 0.05, width = 0, alpha = 0.3)
Adding it as a variable to control for its potentially confounding effect results in a model that still meets all assumptions. However, sample size is quite small and groups are not balanced, so likely we do not have enough power to resolve both. Moreover, the effect of reticulocytes is not significant, and the direction and magnitude of the Hbb genotype effect stays consistent, although the p-value is no longer significant for HbSS/HbAA (the Hb/AS effect remains the same).
model <- glmmTMB(
cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype +
log(Reticulocytes),
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~
## HBB.genotype + log(Reticulocytes)
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 334.8 344.1 -160.4 320.8 21
##
##
## Dispersion parameter for betabinomial family (): 127
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.04178 0.12954 -15.761 < 2e-16 ***
## HBB.genotypeHbAC 0.21718 0.17218 1.261 0.20718
## HBB.genotypeHbAS 0.55403 0.17167 3.227 0.00125 **
## HBB.genotypeHbSC -0.34388 0.27842 -1.235 0.21679
## HBB.genotypeHbSS -0.39856 0.19364 -2.058 0.03957 *
## log(Reticulocytes) -0.04705 0.10659 -0.441 0.65890
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.2398, p-value = 0.258
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
# pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "sidak",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.243 0.214 Inf 0.809 1.91 1 1.261 0.6049
## HbAS / HbAA 1.740 0.299 Inf 1.135 2.67 1 3.227 0.0050
## HbSC / HbAA 0.709 0.197 Inf 0.354 1.42 1 -1.235 0.6237
## HbSS / HbAA 0.671 0.130 Inf 0.414 1.09 1 -2.058 0.1491
##
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 4 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: sidak method for 4 tests
## Tests are performed on the log odds ratio scale
performance::check_collinearity(model)
anova(model, model_simple, test = "LRT")
In a model with just reticulocytes, its effect does appear to be significant.
model <- glmmTMB(
cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ log(Reticulocytes),
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~
## log(Reticulocytes)
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 345.7 349.7 -169.8 339.7 25
##
##
## Dispersion parameter for betabinomial family (): 61.7
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.8735 0.1002 -18.700 < 2e-16 ***
## log(Reticulocytes) -0.3281 0.1038 -3.161 0.00157 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.6419, p-value = 0.056
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$Reticulocytes)
Checking for any confounding effect of blood group on the relation between SCR and genotype is difficult due to the small and unbalanced group sizes; some blood groups only contain a single genotype, and taken together with the small sample size would lead to subgroups with only 1 or 2 samples.
table(df$Bloodgroup, df$HBB.genotype)
##
## HbAA HbAC HbAS HbSC HbSS
## 0 1 2 2 1 7
## 1 0 0 0 0 4
## 2 0 2 1 0 0
## 3 2 0 0 0 1
## 4 2 1 1 1 0
ggplot(data = df %>% group_by(Bloodgroup) %>% count(HBB.genotype),
aes(x = Bloodgroup, y = n, fill = HBB.genotype)) +
geom_bar(stat = "identity", position = position_dodge()) +
ylab("Frequency") +
xlab("Blood group") +
scale_fill_viridis_d() +
theme_bw()
ggplot(df, aes(x = Group, y = SCR_CHOLINE_Gen1_35to40hpi, fill = Bloodgroup)) +
geom_boxplot(alpha = 0.8) +
geom_point(aes(fill = Bloodgroup), size = 4, shape = 21, position = position_jitterdodge()) +
theme_minimal()
In a model with only blood group and genotype, blood group itself is not significant. The effect of genotype is less pronounced though, with smaller magnitude and larger p-values for HbAS (but not for HbSS), similar to the model with all variables. The direction and magnitude of estimates remains similar.
model <- glmmTMB(
cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype + Bloodgroup,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~
## HBB.genotype + Bloodgroup
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 337.2 350.6 -158.6 317.2 18
##
##
## Dispersion parameter for betabinomial family (): 146
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.90934 0.16409 -11.636 < 2e-16 ***
## HBB.genotypeHbAC 0.09039 0.19217 0.470 0.638093
## HBB.genotypeHbAS 0.42523 0.18534 2.294 0.021769 *
## HBB.genotypeHbSC -0.49251 0.26450 -1.862 0.062598 .
## HBB.genotypeHbSS -0.65825 0.19107 -3.445 0.000571 ***
## Bloodgroup1 0.24787 0.18898 1.312 0.189649
## Bloodgroup2 0.02735 0.17268 0.158 0.874150
## Bloodgroup3 -0.26549 0.21600 -1.229 0.219037
## Bloodgroup4 -0.10952 0.15268 -0.717 0.473157
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.2499, p-value = 0.202
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
# pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)
contrast(emm, method = list("HbAC / HbAA" = AC - AA,
"HbAS / HbAA" = AS - AA,
"HbSC / HbAA" = SC - AA,
"HbSS / HbAA" = SS - AA),
adjust = "bonferroni",
type="response",
infer = c(TRUE, TRUE)
)
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.095 0.2100 Inf 0.677 1.769 1 0.470 1.0000
## HbAS / HbAA 1.530 0.2840 Inf 0.963 2.431 1 2.294 0.0871
## HbSC / HbAA 0.611 0.1620 Inf 0.316 1.183 1 -1.862 0.2504
## HbSS / HbAA 0.518 0.0989 Inf 0.321 0.834 1 -3.445 0.0023
##
## Results are averaged over the levels of: Bloodgroup
## Confidence level used: 0.95
## Conf-level adjustment: bonferroni method for 4 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: bonferroni method for 4 tests
## Tests are performed on the log odds ratio scale
performance::check_collinearity(model)
The LRT prefers the simpler model with just genotype. So we are left with the choice of a simpler model (should be preferable due to small sample size), but we need to be transparent that a model that takes blood group into account would reduce the effect size of genotype on the sexual conversion rate.
anova(model, model_simple, test = "LRT")
A model with just blood group also shows some level of association with SCR, but they disappear when adjusting the p-values for multiple comparisons. So, as for genotype itself, the effects are borderline.
model <- glmmTMB(
cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ Bloodgroup,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~
## Bloodgroup
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 354.3 362.3 -171.1 342.3 22
##
##
## Dispersion parameter for betabinomial family (): 56.1
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.18456 0.12120 -18.025 <2e-16 ***
## Bloodgroup1 -0.08303 0.25199 -0.329 0.742
## Bloodgroup2 0.53438 0.23845 2.241 0.025 *
## Bloodgroup3 -0.15956 0.28859 -0.553 0.580
## Bloodgroup4 0.24596 0.21329 1.153 0.249
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.7923, p-value = 0.034
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$Bloodgroup)
emm <- emmeans(model, ~ Bloodgroup)
pairs(emm, adjust="bonferroni", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio SE df asymp.LCL asymp.UCL null
## Bloodgroup1 / Bloodgroup0 0.920 0.232 Inf 0.454 1.87 1
## Bloodgroup2 / Bloodgroup0 1.706 0.407 Inf 0.874 3.33 1
## Bloodgroup2 / Bloodgroup1 1.854 0.562 Inf 0.792 4.34 1
## Bloodgroup3 / Bloodgroup0 0.853 0.246 Inf 0.379 1.92 1
## Bloodgroup3 / Bloodgroup1 0.926 0.318 Inf 0.353 2.43 1
## Bloodgroup3 / Bloodgroup2 0.500 0.167 Inf 0.196 1.28 1
## Bloodgroup4 / Bloodgroup0 1.279 0.273 Inf 0.703 2.33 1
## Bloodgroup4 / Bloodgroup1 1.390 0.394 Inf 0.627 3.08 1
## Bloodgroup4 / Bloodgroup2 0.749 0.203 Inf 0.350 1.61 1
## Bloodgroup4 / Bloodgroup3 1.500 0.475 Inf 0.617 3.65 1
## z.ratio p.value
## -0.329 1.0000
## 2.241 0.2502
## 2.038 0.4160
## -0.553 1.0000
## -0.223 1.0000
## -2.077 0.3781
## 1.153 1.0000
## 1.160 1.0000
## -1.062 1.0000
## 1.281 1.0000
##
## Confidence level used: 0.95
## Conf-level adjustment: bonferroni method for 10 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: bonferroni method for 10 tests
## Tests are performed on the log odds ratio scale
Comparing to the Kruskal Wallis and Wilcoxon rank-sum test:
kruskal.test(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` /
(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ df$Bloodgroup)
##
## Kruskal-Wallis rank sum test
##
## data: df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`/(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) by df$Bloodgroup
## Kruskal-Wallis chi-squared = 6.8139, df = 4, p-value = 0.1461
kruskal.test(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` /
(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ df$HBB.genotype)
##
## Kruskal-Wallis rank sum test
##
## data: df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`/(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) by df$HBB.genotype
## Kruskal-Wallis chi-squared = 16.512, df = 4, p-value = 0.002403
grateful::cite_packages(output = "table", out.dir = here(), cite.tidyverse = TRUE)